import json

import requests
from lxml import etree
#  pip install requests -i https://pypi.tuna.tsinghua.edu.cn/simple

# url = "https://fanyi.baidu.com/basetrans"

# course=1-->226
url = "https://word.iciba.com/?action=words&class=11&course=3"
f = open('wordcet4.txt',mode="w",encoding="utf-8")
for i in range(1,227):
# for i in range(1,2):
    url = "https://word.iciba.com/?action=words&class=11&course=" + str(i)

    # liList xpath: //ul[@class="word_main_list"]/li
    response = requests.get(url)
    rootElement = etree.HTML(response.text)
    liList =  rootElement.xpath('//ul[@class="word_main_list"]/li')
    print(len(liList))
    # 遍历每个li，取里面的英文，英标，中文
    #  一行是一个单词，单词信息是一个json格式的json字符串
    #   {"eng":"simple","proc":"[xxx]","chi":"简单的" }

    for li in liList:
        eng = li.xpath("./div[1]/span/text()")[0].strip()
        # 下面的严谨的写法
        # eng = li.xpath("./div[1]/span/text()")[0].strip() if len( li.xpath("./div[1]/span/text()") )>0 else None
        # print(eng)
        proc = li.xpath("./div[2]/strong/text()")[0].strip()
        chi = li.xpath("./div[3]/span/text()")[0].strip()
        w = {
            "eng":eng,
            "proc":proc,
            "chi":chi
        }

        line = json.dumps(w,ensure_ascii=False,indent=None)
        f.write(line+"\n")





f.close()
    # print(response.text)
    # print(url)

# print(help(str))

# # params = {
# #     'query':'昨天天气不错，下午打个球去。',
# #     'from':'zh',
# #     'to': 'en',
# #     'token':'faada14f84ec2b542ab31592f7e44ef7',
# #     'sign':'676834.962771',
# #     'Cookie':'BIDUPSID=E53ECC6A6B63A21497AD16751AF801ED; PSTM=1678155848; BAIDUID=E53ECC6A6B63A214DD1A0672D0BE832F:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=36551_38270_37861_38173_38289_38225_37933_38316_38325_26350_37881; BA_HECTOR=242l8k8ga08l012400810kei1i0it2d1m; delPer=0; PSINO=7; BAIDUID_BFESS=E53ECC6A6B63A214DD1A0672D0BE832F:FG=1; ZFY=u18uCCKF3eiZAvELm6:AP7NqLlmkHKkoW4FK5hFa:AobQ:C; APPGUIDE_10_0_2=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1678342511; Hm_lvt_afd111fa62852d1f37001d1f980b6800=1678342570; Hm_lpvt_afd111fa62852d1f37001d1f980b6800=1678342627; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1678342628; ab_sr=1.0.1_ODZmNWFjOGQzYjBlMmE1YjI4NzVmN2QwMzI1ZDkyMDQ4NzA2NDIzMjUxMDhiMTllOTFiZDU5NTIzM2I5ZGEzN2MzMzNmNzcxYTQxNmQ5NmFhZTk2NmUzYzliMjY0Njk1YWM5MjU1NWE4YWFjNDU1MDIzZWEyOGM3NDM3Y2Y4NDU4NjVlM2MwZWNmM2I3NmE0OWQ1NTgxNjAxMWI3ODcwNQ=='
# # }
#
# params = {
#     'kw':'昨天天气不错，下午打个球去。'
# }
# headers = {
#     'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
# }
# response = requests.post(url,params=params,headers=headers)
# # response = requests.get('https://www.ifeng.com')
# content = response.content
# text = response.text
# print(type(content))
# print(type(text))
#
#
# print(text)

# f = open('a.txt',mode="a")
# f.write(text)

